# -*- coding: utf-8 -*-
import scrapy
import time
from mySpider.items import BaiSearchItem
import json

class BaiduSpider(scrapy.Spider):
    name = 'baidu'
    allowed_domains = ['www.baidu.com']
    start_urls = ['https://www.baidu.com/s?wd=python']
    number = 0

    def parse(self, response):
        pages = response.css('#page .n::attr("href")').extract()
        page = pages[-1]

        # 获取目标div
        divs = response.css('#content_left .result')
        for div in divs:
            items = BaiSearchItem()
            json_string = div.css('.c-tools::attr("data-tools")').extract_first()
            title_url = json.loads(json_string)
            items['title'] = title_url['title']
            items['url'] = title_url['url']
            content_string = div.css('.c-abstract *::text').extract()
            items['content'] = ''.join(content_string)
            yield items

        # 获取标题名称

        # 获取标题
        url = response.urljoin(page)
        # pass
        time.sleep(1)
        if BaiduSpider.number == 10:
            return None
        else:
            BaiduSpider.number += 1
        # print(url)
        yield scrapy.Request(url=url, callback=self.parse)
